<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>

    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <meta content="Cask Data, Inc." name="author" />
<meta content="Copyright © 2017 Cask Data, Inc." name="copyright" />
<meta content="The CDAP User Guide: Getting Started" name="description" />


    <meta name="git_release" content="6.1.1">
    <meta name="git_hash" content="05fbac36f9f7aadeb44f5728cea35136dbc243e5">
    <meta name="git_timestamp" content="2020-02-09 08:22:47 +0800">
    <title>示例: 使用客户地址信息分发市场推广材料</title>

    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/bootstrap-3.3.6/css/bootstrap-theme.min.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/bootstrap-sphinx.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-dynamicscrollspy-4.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-jquery.mCustomScrollbar.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/abixTreeList-2.css" type="text/css" />
    <link rel="stylesheet" href="../_static/cdap-bootstrap.css" type="text/css" />
    <link rel="stylesheet" href="../_static/css/cdap-hide-toc.css" type="text/css" />

    <script type="text/javascript">
      var DOCUMENTATION_OPTIONS = {
        URL_ROOT:    '',
        VERSION:     '6.1.1',
        COLLAPSE_INDEX: false,
        FILE_SUFFIX: '.html',
        HAS_SOURCE:  false
      };
    </script>
    <script type="text/javascript" src="../_static/jquery.js"></script>
    <script type="text/javascript" src="../_static/underscore.js"></script>
    <script type="text/javascript" src="../_static/doctools.js"></script>
    <script type="text/javascript" src="../_static/language_data.js"></script>

    <link rel="shortcut icon" href="../_static/favicon.ico"/>
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="top" title="Cask Data Application Platform 6.1.1 Documentation" href="../index.html" />
    <link rel="up" title="CDAP 入门指南" href="index.html" />
    <link rel="next" title="示例: 使用纽约时报 XML 数据推送" href="nytimes-xml.html" />
    <link rel="prev" title="CDAP 入门指南" href="index.html" />
    <!-- block extrahead -->
    <meta charset='utf-8'>
    <meta http-equiv='X-UA-Compatible' content='IE=edge,chrome=1'>
    <meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1'>
    <meta name="apple-mobile-web-app-capable" content="yes">
    <!-- block extrahead end -->

</head>
<body role="document">

<!-- block navbar -->
<div id="navbar" class="navbar navbar-inverse navbar-default navbar-fixed-top">
    <div class="container-fluid">
      <div class="row">
        <div class="navbar-header">
          <!-- .btn-navbar is used as the toggle for collapsed navbar content -->
          <a class="navbar-brand" href="../table-of-contents/../../index.html">
            <span><img alt="CDAP logo" src="../_static/cdap_logo.svg"/></span>
          </a>

          <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".nav-collapse">
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
            <span class="icon-bar"></span>
          </button>

          <div class="pull-right">
            <div class="dropdown version-dropdown">
              <a href="#" class="dropdown-toggle" data-toggle="dropdown"
                role="button" aria-haspopup="true" aria-expanded="false">
                v 6.1.1 <span class="caret"></span>
              </a>
              <ul class="dropdown-menu">
                <li><a href="//docs.cdap.io/cdap/5.1.2/en/index.html">v 5.1.2</a></li>
                <li><a href="//docs.cdap.io/cdap/4.3.4/en/index.html">v 4.3.4</a></li>
              </ul>
            </div>
          </div>
          <form class="navbar-form navbar-right navbar-search" action="../search.html" method="get">
            <div class="form-group">
              <div class="navbar-search-image material-icons"></div>
              <input type="text" name="q" class="form-control" placeholder="  Search" />
            </div>
            <input type="hidden" name="check_keywords" value="yes" />
            <input type="hidden" name="area" value="default" />
          </form>

          <div class="collapse navbar-collapse nav-collapse navbar-right navbar-navigation">
            <ul class="nav navbar-nav"><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../index.html">简介</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link current" href="../table-of-contents/../../guides.html">手册</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../reference-manual/index.html">参考</a></li><li class="docsite-nav-tab-container"><a class="docsite-nav-tab-link " href="../table-of-contents/../../faqs/index.html">帮助</a></li>
            </ul>
          </div>

        </div>
      </div>
    </div>
  </div><!-- block navbar end -->
<!-- block main content -->
<div class="main-container container">
  <div class="row"><div class="col-md-2">
      <div id="sidebar" class="bs-sidenav scrollable-y-outside" role="complementary">
<!-- theme_manual: user-guide -->
<!-- theme_manual_highlight: guides -->
<!-- sidebar_title_link: ../table-of-contents/../../guides.html -->

  <div role="note" aria-label="manuals links"><h3><a href="../table-of-contents/../../guides.html">Guides</a></h3>

    <ul class="this-page-menu">
      <li class="toctree-l1"><b><a href="../table-of-contents/../../user-guide/index.html" rel="nofollow">用户手册</a></b>
      <nav class="pagenav">
      <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../index.html"> 简介</a></li>
<li class="toctree-l1"><a class="reference internal" href="../overview.html"> 概述</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="index.html"> 入门指南</a><ul class="current">
<li class="toctree-l2 current"><a class="current reference internal" href="#">MySQL 客户数据</a></li>
<li class="toctree-l2"><a class="reference internal" href="nytimes-xml.html">纽约时报 XML 数据推送</a></li>
<li class="toctree-l2"><a class="reference internal" href="stocks.html">股票选择</a></li>
<li class="toctree-l2"><a class="reference internal" href="fitbit.html">物联网 IoT 设备数据</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../data-preparation/index.html"> 数据预处理</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/concepts.html">      概念</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/directives/index.html">      数据处理指令</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/catalog-lookup.html">catalog-lookup</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/change-column-case.html">change-column-case</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/changing-case.html">changing-case</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/cleanse-column-names.html">cleanse-column-names</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/columns-replace.html">columns-replace</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/copy.html">copy</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/cut-character.html">cut-character</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/decode.html">decode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/diff-date.html">diff-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/drop.html">drop</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/encode.html">encode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/extract-regex-groups.html">extract-regex-groups</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/fail.html">fail</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/fill-null-or-empty.html">fill-null-or-empty</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/filter-row-if-matched.html">filter-row-if-matched</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/filter-row-if-true.html">filter-row-if-true</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/filter-rows-on.html">filter-rows-on</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/find-and-replace.html">find-and-replace</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/flatten.html">flatten</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/format-date.html">format-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/format-unix-timestamp.html">format-unix-timestamp</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/generate-uuid.html">generate-uuid</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/hash.html">hash</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/increment-variable.html">increment-variable</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/index-split.html">index-split</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/invoke-http.html">invoke-http</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/json-path.html">json-path</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/keep.html">keep</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/mask-number.html">mask-number</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/mask-shuffle.html">mask-shuffle</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/merge.html">merge</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-avro-file.html">parse-as-avro-file</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-avro.html">parse-as-avro</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-csv.html">parse-as-csv</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-date.html">parse-as-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-excel.html">parse-as-excel</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-fixed-length.html">parse-as-fixed-length</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-hl7.html">parse-as-hl7</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-json.html">parse-as-json</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-log.html">parse-as-log</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-simple-date.html">parse-as-simple-date</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-as-xml.html">parse-as-xml</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-timestamp.html">parse-timestamp</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/parse-xml-to-json.html">parse-xml-to-json</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/quantize.html">quantize</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/rename.html">rename</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/send-to-error.html">send-to-error</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-charset.html">set-charset</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-column.html">set-column</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-columns.html">set-columns</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-record-delim.html">set-record-delim</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-type.html">set-type</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/set-variable.html">set-variable</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-by-separator.html">split-by-separator</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-email.html">split-email</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-to-columns.html">split-to-columns</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-to-rows.html">split-to-rows</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/split-url.html">split-url</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/stemming.html">stemming</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/swap.html">swap</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/table-lookup.html">table-lookup</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/text-distance.html">text-distance</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/text-metric.html">text-metric</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/trim.html">trim</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/url-decode.html">url-decode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/url-encode.html">url-encode</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/write-as-csv.html">write-as-csv</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/write-as-json-map.html">write-as-json-map</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/write-as-json-object.html">write-as-json-object</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/directives/xpath.html">xpath</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/functions/index.html">      函数</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/json-functions.html">JSON 函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/type-functions.html">类型函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/geofence-functions.html">地理围栏函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/dq-functions.html">数据质量函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/date-functions.html">日期函数</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/functions/ddl-functions.html">DDL 函数</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/service/index.html">      服务</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/admin.html">行政和管理服务</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/connection-properties.html">连接属性</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/connections.html">连接服务</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/execution.html">数据处理指令执行</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/request.html">请求格式规范</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/schema-registry.html">Schema 注册库</a></li>
<li class="toctree-l3"><a class="reference internal" href="../data-preparation/service/services.html">数据预处理服务</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/performance.html">性能</a></li>
<li class="toctree-l2"><a class="reference internal" href="../data-preparation/exclusion-and-aliasing.html">排除与别名</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../pipelines/index.html"> 数据流管道</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/concepts-design.html"> 概念与设计</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/getting-started.html"> 入门指南</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/studio.html"> CDAP 数据流设计器</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/creating-pipelines.html"> 创建数据流管道</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/running-pipelines.html"> 运行数据流管道</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/plugin-management.html"> 插件管理</a></li>
<li class="toctree-l2"><a class="reference internal" href="../pipelines/plugins/index.html"> 插件参考</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/actions/index.html"> Action Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/sources/index.html"> Source Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/transforms/index.html"> Transform Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/analytics/index.html"> Analytic Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/sinks/index.html"> Sink Plugins</a><ul class="simple">
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/shared-plugins/index.html"> Shared Plugins</a><ul>
<li class="toctree-l4"><a class="reference internal" href="../pipelines/plugins/shared-plugins/core.html">CoreValidator</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../pipelines/plugins/post-run-plugins/index.html"> Post-run Plugins</a><ul class="simple">
</ul>
</li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../mmds/index.html"> 数据分析</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../mmds/concepts.html"> Concepts</a></li>
<li class="toctree-l2"><a class="reference internal" href="../mmds/feature-gen.html"> Feature Generation</a></li>
<li class="toctree-l2"><a class="reference internal" href="../mmds/modeling.html"> Modeling</a></li>
<li class="toctree-l2"><a class="reference internal" href="../mmds/example.html"> Example</a></li>
</ul>
</li>
</ul>
</nav>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../developer-manual/index.html" rel="nofollow">开发手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../admin-manual/index.html" rel="nofollow">管理手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../integrations/index.html" rel="nofollow">集成手册</a>
      </li>
      <li class="toctree-l1"><a href="../table-of-contents/../../examples-manual/index.html" rel="nofollow">最佳实践</a>
      </li>
    </ul>
  </div></div>
    </div><div class="col-md-8 content" id="main-content">
    
  <div class="section" id="id1">
<h1>示例: 使用客户地址信息分发市场推广材料<a class="headerlink" href="#id1" title="Permalink to this headline">🔗</a></h1>
<div class="section" id="id2">
<h2>简介<a class="headerlink" href="#id2" title="Permalink to this headline">🔗</a></h2>
<p>本教程演示如何使用 CDAP 的数据预处理和数据流管道清理, 准备, 以及存储客户数据到 MySQL 数据库. 你将学习如何从 CDAP 连接数据源, 如何应用基本数据转换, 以及如何将数据写到一个 CDAP 数据集.</p>
</div>
<div class="section" id="id3">
<h2>场景<a class="headerlink" href="#id3" title="Permalink to this headline">🔗</a></h2>
<p>您希望使用客户地址数据来创建用于持续促销的自定义营销材料, 并将其分发到邮箱中.
但是, 您对广告系列设置了两个限制:</p>
<ul class="simple">
<li>它仅针对加利福尼亚, 华盛顿或俄勒冈州的客户</li>
<li>为了节省燃料和金钱, 该广告系列将只发送到地址是 Avenue (而不是 Road 或 Court), 因为这些地址更容易通过汽车运达</li>
</ul>
</div>
<div class="section" id="id4">
<h2>数据<a class="headerlink" href="#id4" title="Permalink to this headline">🔗</a></h2>
<p>单击下面的按钮下载一个 <cite>.zip</cite> 文件, 其中包含完成本教程所需的全部数据.</p>
<p><a class="reference download internal" download="" href="../_downloads/167961c631054a6d0a7f373a5654089d/campaign-data.zip"><code class="xref download docutils literal notranslate"><span class="pre">Zipfile</span></code></a></p>
</div>
<div class="section" id="id5">
<h2>视频教程<a class="headerlink" href="#id5" title="Permalink to this headline">🔗</a></h2>
<p>(暂未提供)
..  youtube:: AzQuoIE-jak</p>
</div>
<div class="section" id="id6">
<h2>操作步骤<a class="headerlink" href="#id6" title="Permalink to this headline">🔗</a></h2>
<div class="section" id="id7">
<h3>加载数据<a class="headerlink" href="#id7" title="Permalink to this headline">🔗</a></h3>
<p>首先, 您需要导入客户数据. <code class="docutils literal notranslate"><span class="pre">demo.sql</span></code> 包含客户数据. 在您的 shell 中, 登录 MySQL (对我来说, 使用 <code class="docutils literal notranslate"><span class="pre">mysql</span> <span class="pre">-u</span> <span class="pre">root</span></code>)
并创建一个名为 <code class="docutils literal notranslate"><span class="pre">demo</span></code> 的数据库 (通过运行 <code class="docutils literal notranslate"><span class="pre">CREATE</span> <span class="pre">DATABASE</span> <span class="pre">demo;</span></code>). 然后退出 mysql (‘exit;``).</p>
<p>在您的 shell 中, 浏览到 <code class="docutils literal notranslate"><span class="pre">demo.sql,</span></code> 文件所在的目录并运行 <code class="docutils literal notranslate"><span class="pre">mysql</span> <span class="pre">-u</span> <span class="pre">root</span> <span class="pre">-p</span> <span class="pre">demo</span> <span class="pre">&lt;</span> <span class="pre">demo.sql</span></code>.
现在, 数据库 <code class="docutils literal notranslate"><span class="pre">demo</span></code> 的表 <code class="docutils literal notranslate"><span class="pre">customer</span></code> 应包含客户数据.</p>
<p>打开 CDAP 并使用顶部栏导航至数据预处理. 在左侧边栏 (如果不可见, 可以通过左上角的箭头访问),
单击 “添加连接.” 选择数据源 “数据库.”</p>
<p>如果安装了 MySQL 驱动程序, 请跳过此步骤. 如果不是, 请退出提示, 然后单击右上角的 “构件市场”.
在左侧菜单栏上, 选择 “驱动程序.” 单击 MySQL JDBC 驱动程序, 然后按照屏幕上的向导安装驱动程序.</p>
<p>在 “添加连接” 提示中, 选择数据库的名称 (这是供您自己参考的名称). 指定 <code class="docutils literal notranslate"><span class="pre">host</span></code> 为 <code class="docutils literal notranslate"><span class="pre">localhost</span></code>,
<code class="docutils literal notranslate"><span class="pre">port</span></code> 为 <code class="docutils literal notranslate"><span class="pre">3306</span></code>, 并正确输入 用户名/密码 (对我来说, 用户名是 <code class="docutils literal notranslate"><span class="pre">root</span></code>, 没有密码).
单击 “测试连接” 以验证连接是否有效, 然后选择数据库 <code class="docutils literal notranslate"><span class="pre">demo</span></code>.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/address_connect.jpeg"><img alt="../_images/address_connect.jpeg" class="bordered-image" src="../_images/address_connect.jpeg" style="width: 500px;" /></a>
</div>
<p>连接到数据库后, 单击您选择的数据库名称, 该名称将在左侧面板的 “数据库” 标题下.
然后选择 <code class="docutils literal notranslate"><span class="pre">customer</span></code> 表. 您应该看到以 行/列 形式显示的客户数据.</p>
<p>接下来, 您需要导入另一个文件, <code class="docutils literal notranslate"><span class="pre">states.json</span></code>. 单击左上角带有白色箭头的灰色表,
然后导航到系统中存储 <code class="docutils literal notranslate"><span class="pre">states.json</span></code> 的位置. 只需单击文件即可上传.</p>
</div>
<div class="section" id="id8">
<h3>缩写州名<a class="headerlink" href="#id8" title="Permalink to this headline">🔗</a></h3>
<p>送货车辆的导航系统仅识别包含缩写州名的地址, 例如 “CA” 而不是 “加利福尼亚.” 但是, 客户数据仅包含完整的州名.
我们刚导入的 <code class="docutils literal notranslate"><span class="pre">states.json</span></code> 文件包含两列: 一列是完整的州名, 一列是州名缩写.
我们可以将其用作映射来更新客户数据中的州名称.</p>
<p>要创建我们的映射, 请打开 <code class="docutils literal notranslate"><span class="pre">states.json</span></code> 标签. 在数据预处理界面中, 选择此选项卡.
使用 <code class="docutils literal notranslate"><span class="pre">Body</span></code> 列旁的插入符图标, 选择 “解析” 以及 “JSON”. 重复操作两次.</p>
<p>操作两次是因为我们必须首先解析数组, 然后解析数组中的每个 JSON 对象. 只需单击标题并输入文本,
即可将列名从 <code class="docutils literal notranslate"><span class="pre">body_name</span></code> 修改为 <code class="docutils literal notranslate"><span class="pre">name</span></code>, 以及把``body_abbreviation`` 修改为 abbreviation.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/address_parse_states.jpeg"><img alt="../_images/address_parse_states.jpeg" class="bordered-image" src="../_images/address_parse_states.jpeg" style="width: 500px;" /></a>
</div>
<p>现在, 单击 “创建数据流管道” 然后选择 “批处理”. 您现在位于数据流管道 UI 中了,
您将看到一个 “文件” 阶段的数据流进 “Wrangler” 阶段.
这个 “Wrangler” 阶段代表您刚刚在数据预处理中应用的数据处理指令.</p>
<p>在左侧栏中, 单击 “数据接收器” 然后选择 “CDAP 表数据集” 和 “Avro 时间分区数据集” 插件.
将 “Wrangler” 阶段的输出连接到 “CDAP 表数据集.” 单击 “CDAP 表数据集” 阶段, 并将 “name” 添加到 “行字段.” 中</p>
<p>将数据流管道命名为 “StateNamePipeline.” 然后, 通过单击 “部署.” 来部署数据流管道. 通过单击 “运行” 来运行.</p>
<p>您已经创建了一个 CDAP 表数据集, 可用于将客户数据中的状态名称从完整版本更新为缩写版本.</p>
</div>
<div class="section" id="id9">
<h3>更新客户数据中的州名称<a class="headerlink" href="#id9" title="Permalink to this headline">🔗</a></h3>
<p>现在, 您可以使用缩写替换完整的州名称. 导航回到数据预处理, 然后选择 <code class="docutils literal notranslate"><span class="pre">customer</span></code> 选项卡.</p>
<p>由于无法对 <code class="docutils literal notranslate"><span class="pre">null</span></code> 州名称进行查找, 因此需要确保没有空的州名称. 为此, 请选择 <code class="docutils literal notranslate"><span class="pre">State</span> <span class="pre">Column</span></code> 左侧的插入符号图标.
导航到 <code class="docutils literal notranslate"><span class="pre">Filter</span></code>, 然后 <code class="docutils literal notranslate"><span class="pre">移除数据行</span></code> 如果 <code class="docutils literal notranslate"><span class="pre">值为空</span></code>, 如下所示.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-imag reference internal image-reference" href="../_images/address_clean_null.jpeg"><img alt="../_images/address_clean_null.jpeg" class="bordered-imag" src="../_images/address_clean_null.jpeg" style="width: 500px;" /></a>
</div>
<p>您现在可以使用 :ref:<code class="docutils literal notranslate"><span class="pre">`table-lookup</span></code> &lt;table-lookup&gt;` 数据处理指令来替换完整的州名称.</p>
<p>在 CDAP 中, 数据处理指令是数据预处理中用于执行转换的命令.
<code class="docutils literal notranslate"><span class="pre">table-lookup</span></code> 数据处理指令是将存储在 CDAP 表中的数据, 映射为另一列中的值的指令.
例如, 您将使用 <code class="docutils literal notranslate"><span class="pre">StateNameTable</span></code> 来查找缩写的州名.</p>
<p>该数据处理指令的形式为 <code class="docutils literal notranslate"><span class="pre">table-lookup</span> <span class="pre">&lt;column&gt;</span> <span class="pre">&lt;table&gt;</span></code>. 对于本示例, <code class="docutils literal notranslate"><span class="pre">column</span></code> 是 <code class="docutils literal notranslate"><span class="pre">state</span></code>, table 是 <code class="docutils literal notranslate"><span class="pre">StateNameTable</span></code>.
在屏幕底部的命令提示符中应用完整数据处理指令 (<code class="docutils literal notranslate"><span class="pre">table-lookup</span> <span class="pre">state</span> <span class="pre">StateNameTable</span></code>), 如下图所示.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/address_lookup.jpeg"><img alt="../_images/address_lookup.jpeg" class="bordered-image" src="../_images/address_lookup.jpeg" style="width: 500px;" /></a>
</div>
<p>您将看到一个新列, <code class="docutils literal notranslate"><span class="pre">state_abbreviation</span></code>, 出现了.</p>
<p>在屏幕底部的命令提示符中输入数据处理指令的应用方式与通过每一列的下拉菜单应用数据处理指令的原理是一样的.
实际上, 例如, 当您从下拉菜单中选择过滤器时, 数据预处理自动生成并应用等效数据处理指令.
您可以通过在右侧边栏中选择 <code class="docutils literal notranslate"><span class="pre">Directives</span></code> 来查看. 单击数据处理指令旁边的 “x” 可以删除相应的转换.</p>
<p>由于您不再需要完整的州名称, 因此可以删除此列. 选择 <code class="docutils literal notranslate"><span class="pre">state</span></code> 左侧的插入符号, 然后选择 <code class="docutils literal notranslate"><span class="pre">删除数据列</span></code> 选项.
此外, 您还可以重命名 <code class="docutils literal notranslate"><span class="pre">state_abbreviation</span></code>. 双击列名称, 文本将变为可编辑状态. 将其替换为 “State.”</p>
</div>
<div class="section" id="id10">
<h3>选择正确的州<a class="headerlink" href="#id10" title="Permalink to this headline">🔗</a></h3>
<p>您只希望您的广告系列定位到太平洋沿岸的消费者: 加利福尼亚州, 俄勒冈州和华盛顿州.
因此, 你需要删除 <code class="docutils literal notranslate"><span class="pre">state</span></code> 列中的值不是 <code class="docutils literal notranslate"><span class="pre">CA</span></code>, <code class="docutils literal notranslate"><span class="pre">OR</span></code>, 或 <code class="docutils literal notranslate"><span class="pre">WA</span></code> 的行.</p>
<p>为此, 请导航至州名称左侧的插入符号图标. 选择此插入符, 然后选择 <code class="docutils literal notranslate"><span class="pre">过滤器</span></code>. 选择 <code class="docutils literal notranslate"><span class="pre">保留数据行</span></code>,
然后使用下拉菜单选择 <code class="docutils literal notranslate"><span class="pre">如果值匹配正则表达式</span></code>.</p>
<p>你需要你的正则表达式来匹配 <code class="docutils literal notranslate"><span class="pre">CA</span></code>, <code class="docutils literal notranslate"><span class="pre">OR</span></code>, 或 <code class="docutils literal notranslate"><span class="pre">WA</span></code>. 正则表达式 ^(CA|OR|WA)$ 可满足这个要求, 如下所示.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/address_regex.jpeg"><img alt="../_images/address_regex.jpeg" class="bordered-image" src="../_images/address_regex.jpeg" style="width: 500px;" /></a>
</div>
<p>应用过滤器, 只有符合您期望条件的数据行保留下来.</p>
</div>
<div class="section" id="id11">
<h3>选择正确的街道类型<a class="headerlink" href="#id11" title="Permalink to this headline">🔗</a></h3>
<p>因为您认为仅发送到位于 avenue 的地址会更省油和节省成本 (这些路线位于中心位置),
所以您只想保留包含 “Avenue” 一词的地址.</p>
<p>这项任务并不像乍看起来那样简单. 对于州, 您可以只过滤州名称, 因为该列中没有其他文本.
但是, 街道地址示例如下:</p>
<p><code class="docutils literal notranslate"><span class="pre">61</span> <span class="pre">Summit</span> <span class="pre">Avenue</span></code></p>
<p>这意味着您不能简单地过滤要求列等于单词 “Avenue.” 的过滤器</p>
<p>要解决此问题, 们将使用 <code class="docutils literal notranslate"><span class="pre">包含</span></code> 特性. 在 address 数据列中选择插入符号,
然后选择 <code class="docutils literal notranslate"><span class="pre">过滤器</span></code> 以及 <code class="docutils literal notranslate"><span class="pre">保留数据行如果包含</span></code>.
输入 <code class="docutils literal notranslate"><span class="pre">Avenue</span></code>. 另外, 选择 <code class="docutils literal notranslate"><span class="pre">忽略大小写</span></code>. 应用过滤器. 简单!</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/address_street_type.jpeg"><img alt="../_images/address_street_type.jpeg" class="bordered-image" src="../_images/address_street_type.jpeg" style="width: 500px;" /></a>
</div>
<p>您将看到仅剩一个客户. 起初, 您可能会感到震惊. 但是, 数据预处理仅显示数据集中的前 100 行数据.
这是因为数据预处理只是是一个试验场, 是用来在整个数据集上调度大规模并行处理任务前, 查看转换对一小部分数据的影响.</p>
</div>
<div class="section" id="id12">
<h3>最后步骤: 清理数据<a class="headerlink" href="#id12" title="Permalink to this headline">🔗</a></h3>
<p>在将数据写入数据集之前, 需要最后一步来确保数据已清理和准备好, 导航系统可以访问该数据集.</p>
<p>您的导航系统不需要国家名称, 因此该 <code class="docutils literal notranslate"><span class="pre">country</span></code> 数据列没有用. 选择 <code class="docutils literal notranslate"><span class="pre">country</span></code> 数据列旁边的插入符号,
然后选择 <code class="docutils literal notranslate"><span class="pre">删除数据列</span></code>. 您应该看到包含值 <code class="docutils literal notranslate"><span class="pre">USA</span></code> 的这个列消失了.</p>
<p>现在已准备好数据，准备将其写入数据集.</p>
</div>
<div class="section" id="id13">
<h3>写入数据集<a class="headerlink" href="#id13" title="Permalink to this headline">🔗</a></h3>
<p>最后一步是将干净的数据写入数据集. 数据预处理只选择一小部分数据 (100 条记录) 进行转换,
而数据流管道则运行 Spark 或 MapReduce 作业来在集群上并行化这些相同的转换.
这使您可以非常快速地对大量数据进行复杂的转换.</p>
<p>单击 <code class="docutils literal notranslate"><span class="pre">创建数据流管道</span></code>, 然后选择 <code class="docutils literal notranslate"><span class="pre">批处理数据流</span></code>. 您想要批处理, 因为您的 MySQL 数据库不是实时数据源.</p>
<p>在数据流管道 UI 中, 您将看到一个数据库 (带有 <code class="docutils literal notranslate"><span class="pre">customer</span></code> 批注) 阶段连接进入 Wrangler 阶段. Wrangler 阶段包含所有您在数据预处理中应用的转换.</p>
<p>导航到左侧栏的 “数据接收器” 部分, 然后选择 <code class="docutils literal notranslate"><span class="pre">Avro</span> <span class="pre">时间分区数据集</span></code> 插件. 连接 Wrangler 阶段的输出到这个阶段.
双击 <code class="docutils literal notranslate"><span class="pre">Avro</span> <span class="pre">时间分区数据集</span></code> 阶段, 为其命名为 <code class="docutils literal notranslate"><span class="pre">CampaignSink</span></code>. 同样, 命名你的数据流管道为 <code class="docutils literal notranslate"><span class="pre">CampaignPipeline</span></code>.</p>
<p>现在, 您应该能够部署你的数据流管道了. 单击右上角 <code class="docutils literal notranslate"><span class="pre">发布</span></code> 按钮. 部署完成后, 点击 <code class="docutils literal notranslate"><span class="pre">运行</span></code>.</p>
<p>数据流管道运行后, 双击您的 <code class="docutils literal notranslate"><span class="pre">Avro</span> <span class="pre">时间分区数据集</span></code> 阶段. 在弹出的菜单中, 您将看到按钮 <code class="docutils literal notranslate"><span class="pre">查看详细</span></code>.
单击此按钮后, 选择 “眼睛” 图标. 执行在字段中预先填充好的 SQL 查询.
您将看到下面显示一个 SQL 查询结果. 单击此查询旁边的 “眼睛”, 您将看到数据流管道结果.</p>
<div class="figure align-center" style="width: 100%">
<a class="bordered-image reference internal image-reference" href="../_images/address_results.jpeg"><img alt="../_images/address_results.jpeg" class="bordered-image" src="../_images/address_results.jpeg" style="width: 500px;" /></a>
</div>
<p>现在, 准备好的数据已存储在 <code class="docutils literal notranslate"><span class="pre">CampaignSink</span></code> 数据集中, 并且可以通过 RESTful 接口 或 CDAP UI 直接访问.</p>
</div>
</div>
</div>

</div>
    <div class="col-md-2">
      <div id="right-sidebar" class="bs-sidenav scrollable-y" role="complementary">
        <div id="localtoc-scrollspy">
        </div>
      </div>
    </div></div>
</div>
<!-- block main content end -->
<!-- block footer -->
<footer class="footer">
      <div class="container">
        <div class="row">
          <div class="col-md-2 footer-left"><a title="CDAP 入门指南" href="index.html" />Previous</a></div>
          <div class="col-md-8 footer-center"><a class="footer-tab-link" href="../table-of-contents/../../reference-manual/licenses/index.html">Copyright</a> &copy; 2014-2020 Cask Data, Inc.&bull; <a class="footer-tab-link" href="//docs.cask.co/cdap/6.1.1/cdap-docs-6.1.1-web.zip" rel="nofollow">Download</a> an archive or
<a class="footer-tab-link" href="//docs.cask.co/cdap">switch the version</a> of the documentation
          </div>
          <div class="col-md-2 footer-right"><a title="示例: 使用纽约时报 XML 数据推送" href="nytimes-xml.html" />Next</a></div>
        </div>
      </div>
    </footer>
<!-- block footer end -->
<script type="text/javascript" src="../_static/bootstrap-3.3.6/js/bootstrap.min.js"></script><script type="text/javascript" src="../_static/js/bootstrap-sphinx.js"></script><script type="text/javascript" src="../_static/js/abixTreeList-2.js"></script><script type="text/javascript" src="../_static/js/cdap-dynamicscrollspy-4.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script><script type="text/javascript" src="../_static/js/copy-to-clipboard.js"></script><script type="text/javascript" src="../_static/js/jquery.mousewheel.min.js"></script><script type="text/javascript" src="../_static/js/jquery.mCustomScrollbar.js"></script><script type="text/javascript" src="../_static/js/js.cookie.js"></script><script type="text/javascript" src="../_static/js/tabbed-parsed-literal-0.2.js"></script><script type="text/javascript" src="../_static/js/cdap-onload-javascript.js"></script><script type="text/javascript" src="../_static/js/cdap-version-menu.js"></script>
    <script src="https://cdap.gitee.io/docs/cdap/json-versions.js"/></script>
  </body>
</html>